Adiba Gaffar

used additional assistance of lectures, class textbook, plotly, "pandas cheat sheet" from teams chat, general hw chat, chatgpt, and "Tech Know How" on Youtube to complete this file¶

In [4]:
import pandas as pd
import plotly.express as px
import numpy as np
In [5]:
import plotly.graph_objects as go
In [6]:
df = pd.read_csv("https://raw.githubusercontent.com/bcaffo/ds4bme_intro/master/data/kirby127a_3_1_ax_283Labels_M2_corrected_stats.csv")
In [7]:
url = "https://raw.githubusercontent.com/bcaffo/MRIcloudT1volumetrics/master/inst/extdata/multilevel_lookup_table.txt"
multilevel_lookup = pd.read_csv(url, sep = "\t").drop(['Level5'], axis = 1)
multilevel_lookup = multilevel_lookup.rename(columns = {
    "modify"   : "roi", 
    "modify.1" : "level4",
    "modify.2" : "level3", 
    "modify.3" : "level2",
    "modify.4" : "level1"})
multilevel_lookup = multilevel_lookup[['roi', 'level4', 'level3', 'level2', 'level1']]
multilevel_lookup.head()
Out[7]:
roi level4 level3 level2 level1
0 SFG_L SFG_L Frontal_L CerebralCortex_L Telencephalon_L
1 SFG_R SFG_R Frontal_R CerebralCortex_R Telencephalon_R
2 SFG_PFC_L SFG_L Frontal_L CerebralCortex_L Telencephalon_L
3 SFG_PFC_R SFG_R Frontal_R CerebralCortex_R Telencephalon_R
4 SFG_pole_L SFG_L Frontal_L CerebralCortex_L Telencephalon_L
In [8]:
id = 127
subjectData = pd.read_csv("https://raw.githubusercontent.com/bcaffo/ds4bme_intro/master/data/kirby21.csv")
subjectData = subjectData.loc[(subjectData.type == 1) & (subjectData.level == 5) & (subjectData.id == id)]
subjectData = subjectData[['roi', 'volume']]
## Merge the subject data with the multilevel data
subjectData = pd.merge(subjectData, multilevel_lookup, on = "roi")
subjectData = subjectData.assign(icv = "ICV")
subjectData = subjectData.assign(comp = subjectData.volume / np.sum(subjectData.volume))
subjectData.head()
Out[8]:
roi volume level4 level3 level2 level1 icv comp
0 SFG_L 12926 SFG_L Frontal_L CerebralCortex_L Telencephalon_L ICV 0.009350
1 SFG_R 10050 SFG_R Frontal_R CerebralCortex_R Telencephalon_R ICV 0.007270
2 SFG_PFC_L 12783 SFG_L Frontal_L CerebralCortex_L Telencephalon_L ICV 0.009247
3 SFG_PFC_R 11507 SFG_R Frontal_R CerebralCortex_R Telencephalon_R ICV 0.008324
4 SFG_pole_L 3078 SFG_L Frontal_L CerebralCortex_L Telencephalon_L ICV 0.002227
In [9]:
fig = px.sunburst(subjectData, path=['icv', 'level1', 'level2', 'level3', 'level4', 'roi'], 
                  values='comp', width=800, height=800)
fig.show()
In [11]:
import pandas as pd
import plotly.express as px
import numpy as np
import plotly.graph_objects as go
In [12]:
url = "https://raw.githubusercontent.com/bcaffo/MRIcloudT1volumetrics/master/inst/extdata/multilevel_lookup_table.txt"
multilevel_lookup = pd.read_csv(url, sep = "\t").drop(['Level5'], axis = 1)
multilevel_lookup = multilevel_lookup.rename(columns = {"modify":"roi", "modify.1" : "level4", "modify.2" : "level3", "modify.3" : "level2", "modify.4" : "level1"})
In [13]:
multilevel_lookup = multilevel_lookup[['roi', 'level4', 'level3', 'level2', 'level1']]
In [14]:
id = 127
subjectData = pd.read_csv("https://raw.githubusercontent.com/bcaffo/ds4bme_intro/master/data/kirby21.csv")
subjectData = subjectData.loc[(subjectData.type == 1) & (subjectData.level == 5) & (subjectData.id == id)]
subjectData = subjectData[['roi', 'volume']]
subjectData = pd.merge(subjectData, multilevel_lookup, on = "roi")
subjectData = subjectData.assign(icv = "ICV")
subjectData = subjectData.assign(comp = subjectData.volume / np.sum(subjectData.volume))
subjectData.head()
Out[14]:
roi volume level4 level3 level2 level1 icv comp
0 SFG_L 12926 SFG_L Frontal_L CerebralCortex_L Telencephalon_L ICV 0.009350
1 SFG_R 10050 SFG_R Frontal_R CerebralCortex_R Telencephalon_R ICV 0.007270
2 SFG_PFC_L 12783 SFG_L Frontal_L CerebralCortex_L Telencephalon_L ICV 0.009247
3 SFG_PFC_R 11507 SFG_R Frontal_R CerebralCortex_R Telencephalon_R ICV 0.008324
4 SFG_pole_L 3078 SFG_L Frontal_L CerebralCortex_L Telencephalon_L ICV 0.002227
In [15]:
summary = subjectData[["icv", "level1", "level2"]].groupby(["icv", "level1", "level2"], as_index = False).mean()
volume1_summary = subjectData[["icv", "level1", "volume"]].groupby(["icv", "level1"], as_index = False).sum()
volume2_summary = subjectData[["icv", "level1", "level2", "volume"]].groupby(["icv", "level1", "level2"], as_index = False).sum()
In [16]:
data_summary = pd.unique(summary.values.ravel())
data_summary = data_summary.tolist()
In [17]:
volume1_summary
Out[17]:
icv level1 volume
0 ICV CSF 109787
1 ICV Diencephalon_L 11817
2 ICV Diencephalon_R 11560
3 ICV Mesencephalon 10271
4 ICV Metencephalon 159411
5 ICV Myelencephalon 4975
6 ICV Telencephalon_L 531153
7 ICV Telencephalon_R 543444
In [18]:
volume2_summary
Out[18]:
icv level1 level2 volume
0 ICV CSF Sulcus_L 33975
1 ICV CSF Sulcus_R 30018
2 ICV CSF Ventricle 44979
3 ICV CSF Ventricle 815
4 ICV Diencephalon_L BasalForebrain_L 5475
5 ICV Diencephalon_L Thalamus_L 6342
6 ICV Diencephalon_R BasalForebrain_R 5188
7 ICV Diencephalon_R Thalamus_R 6372
8 ICV Mesencephalon Mesencephalon_L 4945
9 ICV Mesencephalon Mesencephalon_R 5326
10 ICV Metencephalon Metencephalon_L 79498
11 ICV Metencephalon Metencephalon_R 79913
12 ICV Myelencephalon Myelencephalon_L 2403
13 ICV Myelencephalon Myelencephalon_R 2572
14 ICV Telencephalon_L CerebralCortex_L 276982
15 ICV Telencephalon_L CerebralNucli_L 12381
16 ICV Telencephalon_L WhiteMatter_L 241790
17 ICV Telencephalon_R CerebralCortex_R 282874
18 ICV Telencephalon_R CerebralNucli_R 13077
19 ICV Telencephalon_R WhiteMatter_R 247493
In [20]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
        pad=15,
        thickness=15,
        line=dict(color="blue",width=0.5),
        label=data_summary
    ),
    link = dict(
        source = [0,1,1,1,1,0,6,6,0,9,9,0,12,12,0,15,15,0,18,18,0,21,21,21,0,25,25,25],
        target = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28],
        value = [109787, 33975, 30018, 44979, 815, 11817, 5475, 6342, 11560, 5188, 6327, 10271, 4945, 5326, 159411, 79498, 79913, 4975, 2403, 2572, 531153, 276982, 12381, 241790, 534444, 2822874, 13077, 247493],
        label = data_summary
    ))])
In [21]:
fig.update_layout(title_text="Sankey Diagram for MRICloud Data", font_size=9)
fig.show()
In [23]:
fig.write_html("sankeyfig.html")
In [24]:
import sqlite3
import pandas as pd
In [25]:
conn = sqlite3.connect('opioid.db')
In [26]:
population = pd.read_sql_query('SELECT * FROM population', conn)
land = pd.read_sql_query('SELECT * FROM land', conn)
annual = pd.read_sql_query('SELECT * FROM annual', conn)
In [27]:
annual['DOSAGE_UNIT']=annual['DOSAGE_UNIT'].astype(float)
avg_dose = annual.groupby(['year', 'BUYER_STATE'])['DOSAGE_UNIT'].mean().reset_index(name = "meandose")
In [28]:
avg_dose
Out[28]:
year BUYER_STATE meandose
0 2006 AE 3.300000e+02
1 2006 AK 7.833505e+05
2 2006 AL 2.602427e+06
3 2006 AR 1.180069e+06
4 2006 AZ 1.181490e+07
... ... ... ...
496 2014 VT 1.179762e+06
497 2014 WA 7.009620e+06
498 2014 WI 2.934918e+06
499 2014 WV 2.195396e+06
500 2014 WY 9.400874e+05

501 rows × 3 columns

In [29]:
visual = px.scatter(avg_dose, x="BUYER_STATE", y="meandose", color="year", title="Avg Opioid Pills by State Through Time", labels={"meandose": "Mean Opioid Pills", "BUYER_STATE": "State", "year": "Year"})
In [30]:
visual
In [31]:
visual.write_html("opioid.html")
In [ ]: